---
title: "Party System Stability"
output:
  html_document:
    df_print: paged
    toc: true
    toc_depth: 2
---

# Preparing the data on programmatic stability

Read the packages, the data, and select the variables needed to create the index of programmatic change

The poldem-election_all.dta is available to download at: https://poldem.eui.eu/download/election-campaigns/

```{r message=FALSE, warning=FALSE}
rm(list=ls(all=TRUE))
Sys.setenv(LANG = "en")
options(scipen=999)

library(tidyverse)
library(haven)
library(tidyselect)
library(dplyr)
library(readxl)
library(lubridate)
library(janitor)
library(ggplot2)
library(ggrepel)
library(cowplot)

path <- "C:\\users\\borbath\\box.fu\\Papers_not_with_Swen\\Eastern_Europe_cs\\empirical_analysis\\"

dat <- read_dta(paste0(path, "data\\online\\poldem-election_all.dta"))

dat <- dat %>% 
  filter(issue_cat<=900 & source_type==1 & 
           !is.na(party_id) & type_cs == 1) %>% # missing parties, a-a sentences, issues without direction
  filter((country==1 & year_election>=2006) | (country==2 & year_election>=2005) |
           (country==3 & year_election>=2007) | (country==4 & year_election>=2005) | 
           (country==5 & year_election>=2006) | (country==6 & year_election>=2007) |
           (country==12 & year_election!=2018) | (country==9 & year_election!=2018) |
           country %in% c(7, 8, 10, 11, 13, 14, 15)) %>% # selection the elections I include
  mutate(country_year=paste0(iso2code, "_", year_election),
         issue_string=as.character(as_factor(issue_cat))) %>% 
  select(country, iso2code, election_date, country_year,
         year_election, party_id, subject_party, party_name,
           party_name_short, issue_string, direction, issue_cat)
```

To calculate the two weights needed afterwards for the index:

1. standing of parties (perc_party)
2. salience of issues (perc_issue)

```{r, warning=FALSE, message=FALSE}
dat <- dat %>% 
  group_by(country_year, issue_cat, party_id) %>% 
  mutate(count_issuebyparty=n()) %>% 
  group_by(country_year, issue_cat) %>%
  mutate(count_issue=n()) %>% 
  group_by(country_year) %>% 
  mutate(count_total=n()) %>% 
  group_by(country_year, party_id) %>%
  mutate(count_party=n()) %>% 
  group_by(country_year, party_id) %>% 
  mutate(perc_party=count_party/count_total) %>% 
  group_by(country_year, issue_cat) %>% 
  mutate(perc_issue=count_issue/count_total) %>% 
  ungroup(.)
  # filter(count_party>30 & count_issue>50) # this threshold sets how salient an issue and large a party has to be in order to be inclued.
```

now to get to the party issue level dataset:

```{r, warning=FALSE, message=FALSE}
dat <- dat %>% 
  select(country_year, country, iso2code, direction,
           year_election, party_id, party_name_short,
           perc_party, issue_string, issue_cat, perc_issue,
           c(vars_select(colnames(dat), matches('^count_')))) %>%
  group_by(country_year, party_id, issue_cat) %>% 
  mutate(party_pos=mean(direction, na.rm = TRUE)) %>% # position
  select(-direction) %>% 
  distinct(.) %>% 
  mutate(party_sal=count_issuebyparty/count_party) %>% # salience
  ungroup(.)

```

## Missing data

First, a grid which has all the neccessary combinations applied to the dataset as observed:

```{r, warning=FALSE, message=FALSE}

# first: let's deal with problem nr. 1 and create a ballanced dataset for parties in all election years

ballanced <- dat %>% 
  select(country_year, party_id) %>% 
  distinct(.)
ballanced <- expand.grid(ballanced$country_year, ballanced$party_id) # this would be if all parties would be present in all country years
colnames(ballanced) <- c("country_year", "party_id")

iso2_to_grid <- dat %>% 
  select(iso2code, country_year) %>% 
  distinct(.)

ballanced <- merge(ballanced, iso2_to_grid)
rm(iso2_to_grid)

# so let's get rid of parties which are from different countries

party_list_by_country <- dat %>% 
  group_by(iso2code, party_id) %>% 
  summarise()

ballanced <- merge(party_list_by_country, ballanced)

ballanced <- ballanced %>% distinct(.)
ballanced <- ballanced[order(ballanced$country_year), ]

# to test if it went well

test <- ballanced %>% 
  group_by(country_year) %>% 
  mutate(test=n_distinct(party_id))

# table(test$iso2code, test$test) # the values are unique by country, it went well. 
rm(test) # We now have a grid for parties in all country years with the parties from the respective country

# let's now deal with problem nr. 2 and have a grid also ballanced for issues (all parties taking a stance on issues)

country_lev_issues <- dat %>% 
  select(issue_string, iso2code) %>% 
  distinct(.)

# table(country_lev_issues$issue_string, country_lev_issues$iso2code) # immig. was never a topic in RO, the rest works well.
# will take immig out of RO later on.

rm(country_lev_issues)

ball_issues <- dat %>% 
  select(party_id, issue_cat) %>% 
  distinct(.)
ball_issues <- expand.grid(ball_issues$party_id, ball_issues$issue_cat) # this would be if all parties would be present in all country years
colnames(ball_issues) <- c("party_id", "issue_cat")

ball_issues <- ball_issues %>% 
  distinct(.) 

ball_issues <- merge(ball_issues, party_list_by_country) # I need this in order to delete immigration from Romania

ballanced <- merge(ball_issues, ballanced, all = TRUE) # so now we have both grids, let's unite them
rm(ball_issues)

ballanced <- ballanced %>% filter(!(iso2code=="RO" & issue_cat==120)) # Immigration goes out from RO (see above)

ballanced <- merge(ballanced, dat, all.x = TRUE)

ballanced <- ballanced %>% 
  group_by(country_year) %>% 
  mutate(country=unique(na.omit(country)),
         year_election=unique(na.omit(year_election)),
         count_total=unique(na.omit(count_total))) %>% 
  group_by(country, party_id) %>% 
  mutate(party_name_short=unique(na.omit(party_name_short))) %>% 
  group_by(country_year, party_id) %>%
  mutate(perc_party=max(na.omit(perc_party)),
         count_party=max(na.omit(count_party))) %>% 
  group_by(issue_cat) %>% 
  mutate(issue_string=unique(na.omit(issue_string))) %>% 
  group_by(country_year, issue_cat) %>% 
  mutate(perc_issue=max(na.omit(perc_issue)),
         count_issue=max(na.omit(count_issue))) %>% 
  group_by(country_year, party_id, issue_cat) %>% 
  mutate(count_issuebyparty=max(na.omit(count_issuebyparty)),
         party_pos=max(na.omit(party_pos)),
         party_sal=max(na.omit(party_sal))) %>% 
  mutate_all(function(x) ifelse(x==-Inf,NA,x))

# to simplify things, I assume volatility B and then change it for parties which are not B.
ballanced <- ballanced %>% 
  ungroup(.) %>% 
  arrange(country, year_election, perc_party, perc_issue, .by_group = FALSE)  %>% 
  mutate(type = "Type B") %>% 
  group_by(country) %>% 
  mutate(type=ifelse(year_election==min(year_election, na.rm=TRUE), "First Election", type)) %>% 
  ungroup(.)

party_list_by_country <- merge(party_list_by_country, unique(subset(dat, select=c(party_id, party_name_short))))
party_list_by_country <- party_list_by_country[order(party_list_by_country$iso2code),]

rm(party_list_by_country)

```

Now, let's check for which party which type of programmatic volatility is calculated

```{r, warning=FALSE, message=FALSE}
# parties

ballanced <- ballanced %>% 
  filter(!((country_year=="AU_2006" & party_id==1746) | # these parties were not yet around at the time
             (country_year=="AU_2006" & party_id==2150) | # we miss the KPO in AU
             (country_year=="AU_2006" & party_id==2255) |
             (country_year=="AU_2006" & party_id==2651) |
             (country_year=="AU_2008" & party_id==2150) |
             (country_year=="AU_2008" & party_id==2651) |
             (country_year=="AU_2008" & party_id==669) |
             (country_year=="AU_2013" & party_id==1746) | 
             (country_year=="AU_2013" & party_id==669) |
             (country_year=="AU_2013" & party_id==955) |
             (country_year=="AU_2017" & party_id==669) |
             (country_year=="AU_2017" & party_id==1746) |
             (country_year=="AU_2017" & party_id==955) |
             (country_year=="AU_2017" & party_id==2150) |
             (country_year=="AU_2017" & party_id==1536) |
             (country_year=="DE_2005" & party_id==2253) |
             (country_year=="DE_2009" & party_id==2253) | # we miss the Pirates in DE
             (country_year=="FR_2007" & party_id==2273) | 
             (country_year=="FR_2007" & party_id==2643) |
             (country_year=="FR_2007" & party_id==2644) |
             (country_year=="FR_2007" & party_id==2734) |
             (country_year=="FR_2012" & party_id==1101) |
             (country_year=="FR_2017" & party_id==1101) |
             (country_year=="FR_2017" & party_id==401) |
             (country_year=="IE_2011" & party_id==651) | 
             (country_year=="IE_2016" & party_id==651) | 
             (country_year=="IE_2007" & party_id==2619) |
             (country_year=="IE_2007" & party_id==2620) |
             (country_year=="IE_2007" & party_id==2622) |
             (country_year=="IE_2011" & party_id==2620) |
             (country_year=="IE_2011" & party_id==2622) |
             (country_year=="IE_2016" & party_id==1804) |
             (country_year=="IE_2016" & party_id==30) | # we miss EVP, LPS and EDU in CH_2007
             (country_year=="NL_2010" & party_id==2109) |
             (country_year=="NL_2006" & party_id==2109) |
             (country_year=="NL_2006" & party_id==2641) |
             (country_year=="NL_2010" & party_id==2641) |
             (country_year=="ES_2004" & party_id==551) |
             (country_year=="ES_2004" & party_id==2375) |
             (country_year=="ES_2008" & party_id==2375) |
             (country_year=="ES_2011" & party_id==2375) |
             (country_year=="ES_2004" & party_id==2376) | 
             (country_year=="ES_2008" & party_id==2376) |
             (country_year=="ES_2011" & party_id==2376) |
             (country_year=="ES_2004" & party_id==2606) |
             (country_year=="ES_2008" & party_id==2606) |
             (country_year=="ES_2011" & party_id==2606) |
             (country_year=="ES_2015" & party_id==894) |
             (country_year=="ES_2016" & party_id==894) |
             (country_year=="ES_2004" & party_id==2792) | 
             (country_year=="ES_2008" & party_id==2792) | # we miss CUP before 2015
             (country_year=="GR_2007" & party_id==2091) |
             (country_year=="GR_2007" & party_id==2092) |
             (country_year=="GR_2009" & party_id==2092) |
             (country_year=="GR_2007" & party_id==2093) |
             (country_year=="GR_2009" & party_id==2093) |
             (country_year=="GR_2015.5" & party_id==2093)|
             (country_year=="GR_2007" & party_id==2094) |
             (country_year=="GR_2012.5" & party_id==2094) |
             (country_year=="GR_2015" & party_id==2094) |
             (country_year=="GR_2015.5" & party_id==2094) |
             (country_year=="GR_2007" & party_id==2095) |
             (country_year=="GR_2009" & party_id==2095) |
             (country_year=="GR_2007" & party_id==2346) |
             (country_year=="GR_2009" & party_id==2346) |
             (country_year=="GR_2012" & party_id==2346) |
             (country_year=="GR_2012.5" & party_id==2346) | 
             (country_year=="GR_2007" & party_id==2407) |
             (country_year=="GR_2009" & party_id==2407) |
             (country_year=="GR_2012" & party_id==2407) |
             (country_year=="GR_2015.5" & party_id==2407) |
             (country_year=="GR_2007" & party_id==2596) |
             (country_year=="GR_2009" & party_id==2596) |
             (country_year=="GR_2012" & party_id==2596) |
             (country_year=="GR_2012.5" & party_id==2596) |
             (country_year=="GR_2009" & party_id==2124) |
             (country_year=="GR_2012" & party_id==2124) |
             (country_year=="GR_2012.5" & party_id==2124) |
             (country_year=="GR_2015" & party_id==2124) |
             (country_year=="GR_2015.5" & party_id==2124) |
             (country_year=="IT_2008" & party_id==373) |
             (country_year=="IT_2013" & party_id==373) |
             (country_year=="IT_2006" & party_id==2154) |
             (country_year=="IT_2008" & party_id==2154) |
             (country_year=="IT_2006" & party_id==2155) |
             (country_year=="IT_2008" & party_id==2155) |
             (country_year=="IT_2006" & party_id==2156) |
             (country_year=="IT_2008" & party_id==2156) |
             (country_year=="IT_2006" & party_id==2158) |
             (country_year=="IT_2008" & party_id==2158) |
             (country_year=="IT_2006" & party_id==465) |
             (country_year=="IT_2008" & party_id==465) |
             (country_year=="LV_2014" & party_id==662) |
             (country_year=="LV_2010" & party_id==801) |
             (country_year=="LV_2011" & party_id==801) |
             (country_year=="LV_2014" & party_id==801) |
             (country_year=="LV_2011" & party_id==811) |
             (country_year=="LV_2014" & party_id==811) |
             (country_year=="LV_2010" & party_id==1518) |
             (country_year=="LV_2011" & party_id==1518) |
             (country_year=="LV_2014" & party_id==1518) |
             (country_year=="LV_2006" & party_id==1942) |
             (country_year=="LV_2010" & party_id==1942) |
             (country_year=="LV_2006" & party_id==2365) |
             (country_year=="LV_2010" & party_id==2365) |
             (country_year=="LV_2011" & party_id==2365) |
             (country_year=="LV_2006" & party_id==2366) |
             (country_year=="LV_2010" & party_id==2366) |
             (country_year=="LV_2011" & party_id==2366) |
             (country_year=="LV_2006" & party_id==1667) |
             (country_year=="LV_2011" & party_id==1667) |
             (country_year=="LV_2014" & party_id==1667) |
             (country_year=="LV_2006" & party_id==2367) |
             (country_year=="LV_2010" & party_id==2367) |
             (country_year=="LV_2006" & party_id==2717) |
             (country_year=="LV_2010" & party_id==2717) |
             (country_year=="LV_2011" & party_id==2717) |
             (country_year=="LV_2014" & party_id==1656) |
             (country_year=="PL_2011" & party_id==180) |
             (country_year=="PL_2015" & party_id==180) |
             (country_year=="PL_2011" & party_id==987) |
             (country_year=="PL_2015" & party_id==987) |
             (country_year=="PL_2015" & party_id==1679) |
             (country_year=="PL_2007" & party_id==2600) |
             (country_year=="PL_2011" & party_id==2600) |
             (country_year=="PL_2007" & party_id==2601) |
             (country_year=="PL_2011" & party_id==2601) |
             (country_year=="PL_2007" & party_id==2625) |
             (country_year=="PL_2011" & party_id==2625) |
             (country_year=="HU_2010" & party_id==95) | 
             (country_year=="HU_2014" & party_id==95) | 
             (country_year=="HU_2006" & party_id==403) |
             (country_year=="HU_2014" & party_id==546) |
             (country_year=="HU_2014" & party_id==1426) |
             (country_year=="HU_2006" & party_id==1970) |
             (country_year=="HU_2006" & party_id==2318) |
             (country_year=="HU_2010" & party_id==2659) | 
             (country_year=="HU_2014" & party_id==2659) | 
             (country_year=="HU_2006" & party_id==2659) |
             (country_year=="HU_2010" & party_id==2660) | 
             (country_year=="HU_2014" & party_id==2660) | 
             (country_year=="HU_2006" & party_id==2660) |
             (country_year=="RO_2016" & party_id==5) |
             (country_year=="RO_2004" & party_id==2647) |
             (country_year=="RO_2008" & party_id==2647) |
             (country_year=="RO_2008" & party_id==419) |
             (country_year=="RO_2012" & party_id==419) |
             (country_year=="RO_2016" & party_id==419) |
             (country_year=="RO_2016" & party_id==958) |
             (country_year=="RO_2008" & party_id==888) |
             (country_year=="RO_2012" & party_id==888) |
             (country_year=="RO_2016" & party_id==888) |
             (country_year=="RO_2004" & party_id==2130) |
             (country_year=="RO_2008" & party_id==2130) |
             (country_year=="RO_2016" & party_id==2130) |
             (country_year=="RO_2004" & party_id==2395) |
             (country_year=="RO_2008" & party_id==2395) |
             (country_year=="RO_2004" & party_id==2646) |
             (country_year=="RO_2008" & party_id==2646) |
             (country_year=="RO_2012" & party_id==2646) |
             (country_year=="RO_2004" & party_id==2648) |
             (country_year=="RO_2008" & party_id==2648) |
             (country_year=="RO_2012" & party_id==2648)
             )) %>%
  mutate(type=case_when((country_year=="AU_2006" & party_id==669) ~ "Last elec bfr dying",
                        (country_year=="AU_2008" & party_id==1746) ~ "Type A (new) & Last elec bfr dying",
                        (country_year=="AU_2008" & party_id==955) ~ "Last elec bfr dying",
                        (country_year=="AU_2013" & party_id==2150) ~ "Type A (new) & Last elec bfr dying",
                        (country_year=="AU_2013" & party_id==1536) ~ "Last elec bfr dying",
                        (country_year=="DE_2013" & party_id==2253) ~ "Type A (new)",
                        (country_year=="FR_2007" & party_id==1101) ~ "Last elec bfr dying",
                        (country_year=="IE_2007" & party_id==651) ~ "Last elec bfr dying",
                        (country_year=="IE_2011" & party_id==1804) ~ "Last elec bfr dying",
                        (country_year=="IE_2016" & party_id==2620) ~ "Type A (new)",
                        (country_year=="NL_2012" & party_id==2109) ~ "Type A (new)",
                        (country_year=="ES_2008" & party_id==551) ~ "Type A (new)",
                        (country_year=="ES_2015" & party_id==2375) ~ "Type A (new)",
                        (country_year=="ES_2015" & party_id==2376) ~ "Type A (new)",
                        (country_year=="ES_2011" & party_id==894) ~ "Last elec bfr dying",
                        (country_year=="GR_2012" & party_id==2092) ~ "Type A (new)",
                        (country_year=="GR_2015" & party_id==2093) ~ "Last elec bfr dying",
                        (country_year=="GR_2012" & party_id==2094) ~ "Last elec bfr dying",
                        (country_year=="GR_2012" & party_id==2095) ~ "Type A (new)",
                        (country_year=="GR_2015" & party_id==2346) ~ "Type A (new)",
                        (country_year=="GR_2015" & party_id==2407) ~ "Last elec bfr dying",
                        (country_year=="GR_2007" & party_id==2124) ~ "Last elec bfr dying",
                        (country_year=="GR_2015.5" & party_id==1179) ~ "Last elec bfr dying",
                        (country_year=="IT_2006" & party_id==373) ~ "Last elec bfr dying",
                        (country_year=="IT_2013" & party_id==2155) ~ "Type A (new)",
                        (country_year=="IT_2013" & party_id==2158) ~ "Type A (new) & Last elec bfr dying",
                        (country_year=="LV_2011" & party_id==662) ~ "Last elec bfr dying",
                        (country_year=="LV_2006" & party_id==801) ~ "Last elec bfr dying",
                        (country_year=="LV_2010" & party_id==811) ~ "Last elec bfr dying",
                        (country_year=="LV_2011" & party_id==1942) ~ "Type A (new)",
                        (country_year=="LV_2014" & party_id==2365) ~ "Type A (new)",
                        (country_year=="LV_2014" & party_id==2366) ~ "Type A (new)",
                        (country_year=="LV_2010" & party_id==1667) ~ "Type A (new) & Last elec bfr dying",
                        (country_year=="LV_2014" & party_id==2717) ~ "Type A (new)",
                        (country_year=="LV_2011" & party_id==1656) ~ "Last elec bfr dying",
                        (country_year=="PL_2007" & party_id==180) ~ "Last elec bfr dying",
                        (country_year=="PL_2007" & party_id==987) ~ "Last elec bfr dying",
                        (country_year=="PL_2011" & party_id==1679)  ~ "Last elec bfr dying",
                        (country_year=="PL_2015" & party_id==2600)  ~ "Type A (new)",
                        (country_year=="PL_2015" & party_id==2601)  ~ "Type A (new)",
                        (country_year=="PL_2015" & party_id==2625)  ~ "Type A (new)",
                        (country_year=="HU_2010" & party_id==95) ~ "Last elec bfr dying",
                        (country_year=="HU_2010" & party_id==403) ~ "Type A (new)",
                        (country_year=="HU_2010" & party_id==546) ~ "Last elec bfr dying",
                        (country_year=="HU_2006" & party_id==600) ~ "Type A (new)",
                        (country_year=="HU_2010" & party_id==1426) ~ "Last elec bfr dying",
                        (country_year=="RO_2012" & party_id==5) ~ "Last elec bfr dying",
                        (country_year=="RO_2004" & party_id==419) ~ "Last elec bfr dying",
                        (country_year=="RO_2012" & party_id==958) ~ "Last elec bfr dying",
                        (country_year=="RO_2004" & party_id==888) ~ "Last elec bfr dying",
                        (country_year=="RO_2012" & party_id==2130) ~ "Type A (new) & Last elec bfr dying",
                        (country_year=="RO_2016" & party_id==2646) ~ "Type A (new)",
                        (country_year=="RO_2016" & party_id==2648) ~ "Type A (new)",
                        TRUE ~ as.character(type))) %>% 
  filter(!((country_year=="IT_2008" & party_id==596) | #this was a coding mistake, should be PDL
           (country_year=="IT_2006" & party_id==382) | # although the left had a coalition (CeS), I rename it as PD to be able to trace the party back to it. Given there are no delta scores for the first election, it does not affect the result
           (country_year=="LV_2006" & party_id==1666) # I comp. Unity to New Era
           )) %>% 
  mutate(party_id=case_when(
                        ((country_year=="IE_2011" | country_year=="IE_2007")
                         & party_id==30) ~ 2622, # IA is tied back to the group of indep. before
                        (country_year=="IT_2008" & party_id==67) ~ 596, 
                        (country_year=="IT_2006" & party_id==1048) ~ 382,
                        (country_year=="LV_2006" & party_id==1518) ~ 1666,
                        TRUE ~ as.numeric(party_id)),
         party_name_short=case_when(
                        ((country_year=="IE_2011" | country_year=="IE_2007")
                         & party_name_short=="none") ~ "IA", 
                        (country_year=="IT_2008" & party_name_short=="CeD") ~ "FI-PdL",
                        (country_year=="IT_2006" & party_name_short=="CeS") ~ "PD",
                        (country_year=="LV_2006" & party_name_short=="JL") ~ "V",
                        TRUE ~ as.character(party_name_short))) %>% 
  filter(!((iso2code=="IT" & party_id==67) |
          (iso2code=="IT" & party_id==1048)))

```

Parties which ran but are not in the media:

```{r, warning=FALSE, message=FALSE}


ballanced <- ballanced %>% 
  mutate_at(.vars = vars(perc_party, party_pos, party_sal, count_issuebyparty, count_party),
            .funs = funs(case_when(((country_year=="UK_2005" & party_id==467) |
                (country_year=="UK_2005" & party_id==1272) |
                (country_year=="UK_2005" & party_id==1284) |
                (country_year=="UK_2010" & party_id==467) |
                (country_year=="UK_2010" & party_id==1272) |
                (country_year=="UK_2010" & party_id==1284) |
                (country_year=="UK_2017" & party_id==467) |
                (country_year=="FR_2012" & party_id==1492) |
                (country_year=="FR_2017" & party_id==2399) |
                (country_year=="DE_2013" & party_id==2253) |
                (country_year=="NL_2006" & party_id==990) |
                (country_year=="NL_2006" & party_id==1251) |
                (country_year=="NL_2010" & party_id==1251) |
                (country_year=="NL_2012" & party_id==990) |
                (country_year=="NL_2012" & party_id==1251) |
                (country_year=="NL_2012" & party_id==2109) |
                (country_year=="CH_2011" & party_id==1167) |
                (country_year=="CH_2011" & party_id==1500) |
                (country_year=="CH_2015" & party_id==1167) |
                (country_year=="CH_2015" & party_id==1500) |
                (country_year=="IE_2007" & party_id==1804) |
                (country_year=="GR_2007" & party_id==1280) |
                (country_year=="GR_2012" & party_id==1280) |
                (country_year=="GR_2012" & party_id==2095) |
                (country_year=="GR_2012.5" & party_id==1280) |
                (country_year=="GR_2012.5" & party_id==2092) |
                (country_year=="GR_2015" & party_id==1179) |
                (country_year=="GR_2015" & party_id==2092) |
                (country_year=="GR_2015" & party_id==2095) |
                (country_year=="GR_2015.5" & party_id==1179) |
                (country_year=="GR_2015.5" & party_id==2092) |
                (country_year=="GR_2015.5" & party_id==2095) |
                (country_year=="IT_2006" & party_id==176) |
                (country_year=="IT_2013" & party_id==176) |
                (country_year=="IT_2013" & party_id==1296) |
                (country_year=="ES_2004" & party_id==845) |
                (country_year=="ES_2008" & party_id==551) |
                (country_year=="ES_2011" & party_id==757) |
                (country_year=="ES_2011" & party_id==2792) |
                (country_year=="ES_2015" & party_id==551) |
                (country_year=="ES_2015" & party_id==845) |
                (country_year=="ES_2016" & party_id==551) |
                (country_year=="ES_2016" & party_id==845) |
                (country_year=="ES_2016" & party_id==1361) |
                (country_year=="LV_2006" & party_id==1520) |
                (country_year=="LV_2010" & party_id==239) |
                (country_year=="PL_2007" & party_id==1216) |
                (country_year=="PL_2011" & party_id==207) |
                (country_year=="PL_2011" & party_id==1216) |
                (country_year=="PL_2015" & party_id==207) |
                (country_year=="RO_2012" & party_id==713) |
                (country_year=="RO_2016" & party_id==713)) ~ 0,
            TRUE ~ as.numeric(.))))

#Filling in the zeros

ballanced <- ballanced %>% 
  mutate(party_pos=ifelse(is.na(party_pos) & !is.na(perc_party), 0, party_pos),
         party_sal=ifelse(is.na(party_sal) & !is.na(perc_party), 0, party_sal),
         count_issuebyparty=ifelse(is.na(count_issuebyparty) & !is.na(perc_party), 0, count_issuebyparty),
         count_party=ifelse(is.na(count_party) & !is.na(perc_party), 0, count_party),
         count_issue=ifelse(is.na(count_issue) & !is.na(perc_party), 0, count_issue),
         perc_issue=ifelse(is.na(perc_issue) & !is.na(perc_party), 0, perc_issue)) %>% 
  group_by(iso2code, party_id, issue_cat) %>% 
  arrange(year_election, .by_group=TRUE) %>%
  mutate(type=ifelse(row_number()==1 & type=="Type B", "First Election", type)) %>% #to acc. for parties entering later than the first. elect by country, but not being a genuinly new party
  ungroup(.) %>% 
  arrange(iso2code, year_election, party_id, issue_cat)

write.csv(ballanced, paste0(path, "syntax/Harvard_dataverse/ballanced_dat.csv"),
  row.names = FALSE)

```

# Preparing the organizational dataset

```{r, warning=FALSE, message=FALSE}

dat <- read_excel(paste0(path, "syntax/March_2020/elections_parties_wide.xls"))

long.dat <- dat %>% 
  pivot_longer(cols = el_date1:vote6,
               names_pattern = "^(\\w+)([1-6])$",
               names_to = c(".value", "long_id")) %>% 
  mutate(new_prts = as.numeric(new==1 | splinter==1 | merged_new_name==1 | joint_new_name==1),
         death_prts = as.numeric(disbanded==1 | not_run==1),
         vote=ifelse(is.na(vote), 0, vote)) %>% 
  group_by(country, persparty_id) %>% 
  arrange(el_date, .by_group = TRUE) %>%
  mutate(prev_vote=lag(vote)) %>% 
  mutate(new_vote=new_prts*vote,
         gnew_vote=new*vote, 
         old_vote=death_prts*prev_vote,
         year=case_when(
           (country=="Greece" & month(el_date)==6 & year(el_date)==2012) ~ 2012.5,
           (country=="Greece" & month(el_date)==9 & year(el_date)==2015) ~ 2015.5,
           TRUE ~ year(el_date)),
         iso2code=case_when((country == "France") ~ "FR",
(country == "Switzerland") ~ "CH",
(country == "United Kingdom") ~ "UK",
(country == "Latvia") ~ "LV",
(country == "Poland") ~ "PL",
(country == "Netherlands") ~ "NL",
(country == "Portugal") ~ "PT",
(country == "Germany") ~ "DE",
(country == "Hungary") ~ "HU",
(country == "Spain") ~ "ES",
(country == "Romania") ~ "RO",
(country == "Ireland") ~ "IE",
(country == "Austria") ~ "AU",
(country == "Italy") ~ "IT",
(country == "Greece") ~ "GR")) %>%
  mutate(country_year=paste(iso2code, as.character(year), sep="_")) %>% 
  group_by(country, year) %>% 
  mutate(new_vote=sum(new_vote, na.rm=TRUE),
         gnew_vote=sum(gnew_vote, na.rm=TRUE),
         old_vote=sum(old_vote, na.rm=TRUE)) %>% 
  select(country_year, new_vote, gnew_vote, old_vote) %>% 
  distinct(.) %>% 
  ungroup(.) %>% 
  mutate(org_vol_all=(new_vote+old_vote)/2,
         org_vol_gnew=(gnew_vote+old_vote)/2) %>% 
  filter(!is.na(year)) %>% 
  select(-new_vote, -gnew_vote, -old_vote, - country, -year)
  
  
write.csv(long.dat, paste0(path, "syntax/Harvard_dataverse/org_vol_scores.csv"),
  row.names = FALSE)
```


# Calculating volatility scores

```{r, warning=FALSE, message=FALSE}

ballanced <- read.csv(paste0(path, "syntax/Harvard_dataverse/ballanced_dat.csv"))

pos_sal_cen <- ballanced %>%
  group_by(country_year, issue_cat) %>% 
  # filter(perc_party!=0 & !is.na(perc_party)) %>%
  mutate(nr_parties=sum(!is.na(perc_party))) %>% 
  mutate(possal=party_pos*party_sal,
    possal_cen=(possal)-((sum(possal, na.rm=TRUE)-(possal))/(nr_parties-1)), # leave self out mean
         weightedmean=sum(party_pos*party_sal*perc_party, na.rm = TRUE)/sum(perc_party, na.rm = TRUE)) %>% 
  select(iso2code, country, year_election, country_year,
         party_id, party_name_short, perc_party, type,
         issue_cat, issue_string, perc_issue, possal,
         possal_cen, count_issuebyparty, weightedmean)

```

## Continuity for prog. vol. B where possible

```{r, warning=FALSE, message=FALSE}
# mergers, splits, alliances, everything for what cont. can be established

# parties with one child

org_continuity <- pos_sal_cen %>% 
  filter((country_year=="AU_2006" & party_id==973)|
           (country_year=="AU_2008" & party_id==955) |
           (country_year=="AU_2013" & party_id==1429) |
           (country_year=="FR_2007" & party_id==658) |
           (country_year=="FR_2012" & party_id==1539) |
           (country_year=="FR_2012" & party_id==686) |
           (country_year=="FR_2012" & party_id==401) |
           (country_year=="IE_2011" & party_id==318) |
           (country_year=="CH_2007" & party_id==750) |
           (country_year=="NL_2012" & party_id==742) |
           (country_year=="PT_2015" & party_id==1273) |
           (country_year=="ES_2004" & party_id==894) |
           (country_year=="ES_2008" & party_id==894) |
           (country_year=="ES_2011" & party_id==894) |
           (country_year=="ES_2015" & party_id==2376) |
           (country_year=="ES_2016" & party_id==2376) |
           (country_year=="GR_2015" &  party_id==1592) |
           (country_year=="GR_2015.5" & party_id==1592) |
           (country_year=="GR_2009" & party_id==47) |
           (country_year=="GR_2012" & party_id==1592) |
           (country_year=="GR_2012.5" & party_id==1338) |
           (country_year=="IT_2013" & party_id==1321) |
           (country_year=="IT_2008" & party_id==382) |
           (country_year=="IT_2013" & party_id==596) |
           (country_year=="LV_2006" & party_id==1656) |
           (country_year=="LV_2014" & party_id==2366) |
           (country_year=="LV_2010" & party_id==811) |
           (country_year=="LV_2014" & party_id==1666) |
           (country_year=="LV_2011" & party_id==662) |
           (country_year=="PL_2007" & party_id==528) |
           (country_year=="PL_2007" & party_id==512) |
           (country_year=="HU_2006" & party_id==95) |
           (country_year=="HU_2010" & party_id==1591) |
           (country_year=="RO_2012" & party_id==5) |
           (country_year=="RO_2004" & party_id==419) |
           (country_year=="RO_2012" & party_id==958)) %>% 
  select(country_year, party_id, issue_cat, possal_cen, count_issuebyparty, 
         possal, weightedmean) %>% 
  rename(matching_with=party_id) %>% 
  mutate(party_id=case_when((country_year=="AU_2006" & matching_with==973) ~ 955, # LiF ran w SPO
                            (country_year=="AU_2008" & matching_with==955) ~ 2255, # NEOS comes from a merger with LiF
                            (country_year=="AU_2013" & matching_with==1429) ~ 2651, # Liste PILZ from the Greens
                            (country_year=="FR_2007" & matching_with==658) ~ 2399, # DLR/DLF is a split from UMP
                            (country_year=="FR_2012" & matching_with==1539) ~ 2643, # Macron comes from PS
                            (country_year=="FR_2012" & matching_with==686) ~ 2644, # Melanchon runing in '12
                            (country_year=="FR_2012" & matching_with==401) ~ 2273, # UDI comes from PR
                            (country_year=="IE_2011" & matching_with==318) ~ 2619, # DS was heavily linked to Labor
                            (country_year=="CH_2007" & matching_with==750) ~ 1213, # BDP comes from SVP 
                            (country_year=="NL_2012" & matching_with==742) ~ 2641, # DENK comes from PvdA
                            (country_year=="PT_2015" & matching_with==1273) ~ 251, # CDS-PP ran together with PSD
                            (country_year=="ES_2004" & matching_with==894) ~ 2605, # CDC ran with CiU
                            (country_year=="ES_2008" & matching_with==894) ~ 2605, # CDC ran with CiU
                            (country_year=="ES_2011" & matching_with==894) ~ 2605, # CDC ran with CiU
                            (country_year=="ES_2015" & matching_with==2376) ~ 2606, # En comu and Podemos
                            (country_year=="ES_2016" & matching_with==2376) ~ 118, # PCE/IU ran with Podemos
                            (country_year=="GR_2015" & matching_with==1592) ~ 1280, # OP ran with Syriza
                            (country_year=="GR_2015.5" & matching_with==1592) ~ 1280, # OP ran with Syriza
                            (country_year=="GR_2009" & matching_with==47) ~ 2091, # ANEL as a splinter from ND
                            (country_year=="GR_2012" & matching_with==1592) ~ 2093, # Dimar as a splinter from Syriza
                            (country_year=="GR_2012.5" & matching_with==1338) ~ 2407, # Kidiso from Pasok
                            (country_year=="IT_2013" & matching_with==1321) ~ 693, #PRC and IdV were running together
                            (country_year=="IT_2008" & matching_with==382) ~ 1296, #Ri in coalition with PD
                            (country_year=="IT_2013" & matching_with==596) ~ 2154, #FdI split from PDL
                            (country_year=="LV_2006" & matching_with==1656) ~ 239, #KDS in coal. with LSDSP
                            (country_year=="LV_2014" & matching_with==2366) ~ 239, #KDS in coal. with LRA
                            (country_year=="LV_2010" & matching_with==811) ~ 662, #TP in coal. with LPP/LC
                            (country_year=="LV_2014" & matching_with==1666) ~ 1942, # RP in coal with Unity
                            (country_year=="LV_2011" & matching_with==662) ~ 2367, #ViLa is a succ. of LPP/LC
                            (country_year=="PL_2007" & matching_with==528) ~ 1679, #PjN is a split from PiS
                            (country_year=="PL_2007" & matching_with==512) ~ 1945, #Palikot was in PO
                            (country_year=="HU_2006" & matching_with==95) ~ 600, # Jobbik in coal with MIEP
                            (country_year=="HU_2010" & matching_with==1591) ~ 1970, # DK is a split from MSZP
                            (country_year=="RO_2012" & matching_with==5) ~ 2647, # ALDE from PC
                            (country_year=="RO_2004" & matching_with==419) ~ 958, # PDL from PD
                            (country_year=="RO_2012" & matching_with==958) ~ 2395, # PMP from PDL
  )) %>%
  select(-matching_with)

pos_sal_cen <- merge(pos_sal_cen, org_continuity, by=c("country_year", "party_id", "issue_cat"), all.x = TRUE)
pos_sal_cen <- pos_sal_cen[with(pos_sal_cen, order(country, year_election, party_id, issue_cat)),]

pos_sal_cen <- pos_sal_cen %>% 
  mutate(possal=ifelse(!is.na(possal.y), possal.y, possal.x),
         weightedmean=ifelse(!is.na(weightedmean.y), weightedmean.y, weightedmean.x),
         possal_cen=ifelse(!is.na(possal_cen.y), possal_cen.y, possal_cen.x),
         count_issuebyparty=ifelse(!is.na(count_issuebyparty.y), count_issuebyparty.y, count_issuebyparty.x)) %>% 
  select(-c(possal_cen.y, possal.y, weightedmean.y, count_issuebyparty.y,
            possal_cen.x, possal.x, weightedmean.x, count_issuebyparty.x))

rm(org_continuity)

# parties with two children

org_continuity <- pos_sal_cen %>% 
  filter((country_year=="FR_2007" & party_id==658) |
           (country_year=="GR_2009" & party_id==47) |
           (country_year=="GR_2015" & party_id==1592) |
           (country_year=="IT_2013" & party_id==1321) |
           (country_year=="HU_2010" & party_id==1591)) %>% 
  select(country_year, party_id, issue_cat, possal_cen, count_issuebyparty,
         possal, weightedmean) %>% 
  rename(matching_with=party_id) %>% 
  mutate(party_id=case_when((country_year=="FR_2007" & matching_with==658) ~ 401, # PR supp. UMP
                            (country_year=="GR_2009" & matching_with==47) ~ 2094, # Disy from ND
                            (country_year=="GR_2015" & matching_with==1592) ~ 2596, # LE from Syriza
                            (country_year=="IT_2013" & matching_with==1321) ~ 465,# PRC in SEL
                            (country_year=="HU_2010" & matching_with==1591) ~ 2318 # Egyutt from MSZP
  )) %>%
  select(-matching_with)

pos_sal_cen <- merge(pos_sal_cen, org_continuity, by=c("country_year", "party_id", "issue_cat"), all.x = TRUE)
pos_sal_cen <- pos_sal_cen[with(pos_sal_cen, order(country, year_election, party_id, issue_cat)),]

pos_sal_cen <- pos_sal_cen %>% 
  mutate(possal=ifelse(!is.na(possal.y), possal.y, possal.x),
         weightedmean=ifelse(!is.na(weightedmean.y), weightedmean.y, weightedmean.x),
         possal_cen=ifelse(!is.na(possal_cen.y), possal_cen.y, possal_cen.x),
         count_issuebyparty=ifelse(!is.na(count_issuebyparty.y), count_issuebyparty.y, count_issuebyparty.x)) %>% 
  select(-c(possal_cen.y, possal.y, weightedmean.y, count_issuebyparty.y,
            possal_cen.x, possal.x, weightedmean.x, count_issuebyparty.x))

rm(org_continuity)
```

## Programmatic volatility B scores

```{r, warning=FALSE, message=FALSE}
prog_vol_B <- pos_sal_cen %>%
  group_by(iso2code, party_id, issue_cat) %>% 
  arrange(year_election, .by_group=TRUE) %>%
  mutate(delta_possal_cen=abs(possal_cen - lag(possal_cen))) %>% 
  filter(type=="Type B" | type=="Last elec bfr dying" | type=="First Election") %>%  #the missings in possal_cen are due to parties which are only in the first election and then they disappear (their last election is also the first in the sample).
  group_by(iso2code, year_election) %>%
  mutate(progvolB=sum(delta_possal_cen*perc_party*perc_issue, na.rm = TRUE)) %>%
  select(iso2code, year_election, country_year, progvolB) %>% 
  distinct(.) %>% 
  arrange(year_election, .by_group=TRUE) %>% 
  group_by(iso2code) %>%
  mutate(progvolB = replace(progvolB, row_number() == 1, NA))
```

## Calculating volatility A

```{r, warning=FALSE, message=FALSE}

lagged_means <- pos_sal_cen %>% 
  select(iso2code, year_election, issue_cat, weightedmean) %>% 
  distinct(.) %>% 
  group_by(iso2code, issue_cat) %>% 
  arrange(year_election, .by_group=TRUE) %>%
  mutate(lag_weightedmean=lag(weightedmean)) %>% 
  select(-weightedmean) %>% 
  filter(!is.na(lag_weightedmean)) %>% 
  ungroup(.)

prog_vol_A_gn <- merge(pos_sal_cen, lagged_means, by=c("iso2code", "year_election", "issue_cat"), all.x = TRUE)

prog_vol_A_gn <- prog_vol_A_gn %>% 
  mutate(delta_possal=abs(possal - lag_weightedmean)*perc_party*perc_issue) %>%
  filter(type=="Type A (new)" | type=="Type A (new) & Last elec bfr dying") %>%
  group_by(iso2code, year_election) %>%
  mutate(progvolA_new=sum(delta_possal, na.rm = TRUE)) %>% 
  select(year_election, iso2code, country_year, progvolA_new) %>% 
  distinct(.)

lead_means <- pos_sal_cen %>% 
  select(iso2code, year_election, issue_cat, weightedmean) %>% 
  distinct(.) %>% 
  group_by(iso2code, issue_cat) %>% 
  arrange(year_election, .by_group=TRUE) %>%
  mutate(lead_weightedmean=lead(weightedmean)) %>% 
  select(-weightedmean) %>% 
  filter(!is.na(lead_weightedmean)) %>% 
  ungroup(.)

prog_vol_A_old <- merge(pos_sal_cen, lead_means, by=c("iso2code", "year_election", "issue_cat"), all.x = TRUE)

prog_vol_A_old <- prog_vol_A_old %>% 
  mutate(delta_possal=abs(possal - lead_weightedmean)*perc_party*perc_issue) %>%
  filter(type=="Last elec bfr dying" | type=="Type A (new) & Last elec bfr dying") %>%
  group_by(iso2code, year_election) %>%
  mutate(progvolA_old=sum(delta_possal, na.rm = TRUE)) %>% 
  select(year_election, iso2code, country_year, progvolA_old) %>% 
  distinct(.)

all_cntry_elec <- pos_sal_cen %>% 
  select(iso2code, year_election, country_year) %>% 
  distinct(.)

prog_vol_A_old <- merge(prog_vol_A_old, all_cntry_elec, all=TRUE)

prog_vol_A_old <- prog_vol_A_old %>% # the reference point is the current election, that's the reason to lag it
  group_by(iso2code) %>% 
  arrange(year_election, .by_group=TRUE) %>% 
  mutate(progvolA_old2=lag(progvolA_old)) %>% 
  select(-progvolA_old) %>% 
  rename(progvolA_old=progvolA_old2)

prog_vol_A <- merge(prog_vol_A_gn, prog_vol_A_old, all=TRUE)
prog_vol_A <- prog_vol_A %>% 
  mutate(progvolA_new=ifelse(is.na(progvolA_new), 0, progvolA_new),
         progvolA_old = ifelse(is.na(progvolA_old), 0, progvolA_old)) %>% 
  mutate(progvolA=progvolA_new+progvolA_old) %>% 
  select(-progvolA_new, -progvolA_old)

rm(lagged_means, prog_vol_A_gn, lead_means, pos_sal_cen, all_cntry_elec, prog_vol_A_old)
```

## Programmatic volatility scores

```{r, warning=FALSE, message=FALSE}

prog_vol <- merge(prog_vol_A, prog_vol_B, all=TRUE)
rm(prog_vol_A, prog_vol_B)

prog_vol <- prog_vol %>% 
  group_by(iso2code) %>% 
  arrange(year_election, .by_group=TRUE) %>% 
  mutate(progvolB = replace(progvolB, row_number() == 1, NA),
        progvolA =replace(progvolA, row_number() == 1, NA)) %>% 
  mutate(prog_vol=progvolB + progvolA) %>% 
  select(-progvolB, -progvolA)
```

## Merging with organizational volatility

```{r, warning=FALSE, message=FALSE}
org_vol <- read.csv(paste0(path, "syntax/Harvard_dataverse/org_vol_scores.csv"))

volatility <- merge(org_vol, prog_vol)

volatility <- volatility %>%
  mutate(region = case_when(
    iso2code %in% c("AU", "CH", "DE", "FR", "IE", "NL", "UK") ~ 1,
    iso2code %in% c("ES", "GR", "IT", "PT") ~ 2,
    iso2code %in% c("HU", "LV", "PL", "RO") ~ 3),
  prog_vol = (prog_vol-min(prog_vol, na.rm = TRUE))/(max(prog_vol, na.rm = TRUE)-min(prog_vol, na.rm = TRUE))) %>% 
  group_by(iso2code) %>% 
  arrange(year_election, .by_group=TRUE) %>% 
  filter(!(row_number() == 1 & is.na(prog_vol) & org_vol_all==0)) %>% 
  ungroup(.) %>% 
  select(iso2code, region, year_election, country_year,
        org_vol_all, org_vol_gnew, prog_vol)

volatility_cntry <- volatility %>% 
  select(-year_election, - country_year) %>% 
  group_by(iso2code) %>% 
  summarise_all(funs(mean))

```

```{r, warning=FALSE, message=FALSE}

cor.test(volatility$org_vol_gnew, volatility$prog_vol, method="kendall")
cor.test(volatility$org_vol_gnew, volatility$prog_vol, method="spearman")
cor.test(volatility$org_vol_gnew, volatility$prog_vol, method="pearson")

cor.test(volatility_cntry$org_vol_gnew, volatility_cntry$prog_vol, method="kendall")
cor.test(volatility_cntry$org_vol_gnew, volatility_cntry$prog_vol, method="spearman")
cor.test(volatility_cntry$org_vol_gnew, volatility_cntry$prog_vol, method="pearson")

```

# Figure 1 in the paper

```{r, warning=FALSE, message=FALSE}


scatter_ctr <- ggplot(volatility_cntry, aes(org_vol_gnew, prog_vol)) +
  geom_hline(aes(yintercept = median(prog_vol)), linetype="dashed", color='gray45', alpha = 0.5) +
  geom_vline(aes(xintercept = median(org_vol_gnew)), linetype="dashed", color='gray45', alpha = 0.5) +
  geom_text_repel(aes(label = iso2code), size=3.5, segment.color=NA, 
                  point.padding = unit(1, "points"), nudge_y = 1.0E-6) +
  geom_point(aes(shape=factor(region))) +
  xlab("Extra-system volatility") +
  ylab("Programmatic volatility") +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") +
  ylim(0, 1) + xlim(0, 25)

ggsave(paste0(path, "syntax/Harvard_dataverse/figure_1a.png"), plot=scatter_ctr,
          width=5, height=5)
ggsave(paste0(path, "syntax/Harvard_dataverse/figure_1a.jpg"), plot=scatter_ctr,
          width=5, height=5, dpi = 400)
ggsave(paste0(path, "syntax/Harvard_dataverse/figure_1a.eps"), plot=scatter_ctr,
          width=5, height=5, dpi = 400)

scatter_elc <- ggplot(volatility, aes(org_vol_gnew, prog_vol)) +
  geom_hline(aes(yintercept = median(prog_vol)), linetype="dashed", color='gray45', alpha = 0.5) +
  geom_vline(aes(xintercept = median(org_vol_gnew)), linetype="dashed", color='gray45', alpha = 0.5) +
  geom_text_repel(aes(label = country_year), size=2.5, segment.color=NA, 
                  point.padding = unit(1, "points"), nudge_y = 1.0E-6) +
  geom_point(aes(shape=factor(region))) +
  xlab("Extra-system volatility") +
  ylab("Programmatic volatility") +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") +
  ylim(0, 1) + xlim(0, 25)

ggsave(paste0(path, "syntax/Harvard_dataverse/figure_1b.png"), plot=scatter_elc,
          width=5, height=5)
ggsave(paste0(path, "syntax/Harvard_dataverse/figure_1b.jpg"), plot=scatter_elc,
          width=5, height=5, dpi = 400)
ggsave(paste0(path, "syntax/Harvard_dataverse/figure_1b.eps"), plot=scatter_elc,
          width=5, height=5, dpi = 400)

scatter_ctr
scatter_elc
```

# Figure - appendix

```{r, warning=FALSE, message=FALSE}

two_volata <- ggplot(volatility_cntry, aes(org_vol_gnew, org_vol_all)) +
  geom_smooth(color='gray80', method='lm',
              se = FALSE, show.legend = F) +
  geom_text_repel(aes(label = iso2code), size=3.5, segment.color=NA, 
                  point.padding = unit(1, "points"), nudge_y = 1.0E-6) +
  geom_point(aes(shape=factor(region))) +
  xlab("Extra-system volatility (genuinely new)") +
  ylab("Extra-system volatility (all new)") +
  theme_classic() +
  ggtitle("Country Level") +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") + xlim(0, 20) + ylim(0, 50)

two_volat_elec_level <- ggplot(volatility, aes(org_vol_gnew, org_vol_all)) +
  geom_smooth(color='gray80', method='lm',
              se = FALSE, show.legend = F) +
  geom_text_repel(aes(label = country_year), size=2.5, segment.color=NA, 
                  point.padding = unit(1, "points"), nudge_y = 1.0E-6) +
  geom_point(aes(shape=factor(region))) +
  xlab("Extra-system volatility (genuinely new)") +
  ylab("Extra-system volatility (all new)") +
  theme_classic() +
  ggtitle("Election Level") +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") + xlim(0, 20) + ylim(0, 50)

p <- plot_grid(two_volata, two_volat_elec_level)
p

save_plot(paste0(path, "syntax/Harvard_dataverse/appendix_figure_1.png"), p, ncol = 2,
          base_width=5, base_height=5)

```